package com.huan.hadoop.mr;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * map 操作，读取每行数据，获取webSite的值作为输出的key
 *
 * @author huan.fu
 * @date 2023/7/12 - 20:58
 */
public class PartitionMapper extends Mapper<LongWritable, Text, Text, Text> {

    private final Text outKey = new Text();

    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
        // 获取一行数据 1	13736230513	192.196.100.1	www.baidu.com	2481	24681	200
        String row = value.toString();
        // 分割 [1,13736230513,192.196.100.1,www.baidu.com,2481,24681,200]
        String[] cells = row.split("\\s+");
        // 获取网站
        String webSite = cells[3];

        outKey.set(webSite);

        // 写出
        context.write(outKey, value);
    }
}
